#!/usr/bin/env python
# coding=utf-8
# __author__ = 'Yunchao Ling'

from pymongo import MongoClient


# 连接MongoDB
def InitMongoDB(MONGODB_HOST, MONGODB_PORT, MONGODB_DB_NAME, MONGODB_COLLECTION_NAME):
    client = MongoClient(MONGODB_HOST, MONGODB_PORT)
    db = client[MONGODB_DB_NAME]
    collection = db[MONGODB_COLLECTION_NAME]
    return client, collection


# 关闭MongoDB
def CloseMongoDB(MongoDB_Connection):
    MongoDB_Connection.close()


def GenGIList(class_name):
    count = 0
    idmap = set()
    outfile = open(class_name + ".gilist", "w")

    client, collection = InitMongoDB("localhost", 27017, "Pre_Data", "MicrobiomeGene")

    for result in collection.find({"class": class_name, "seq_id_gi": {"$exists": 1}}, {"seq_id_gi": 1, "_id": 0}):
        gi = result["seq_id_gi"]
        if gi not in idmap:
            idmap.add(gi)
            outfile.write(gi + "\n")
            outfile.flush()
            count += 1
            if count % 100 == 0:
                print(class_name + ": " + str(count) + " GIDs added.")
    outfile.close()

    CloseMongoDB(client)


if __name__ == '__main__':
    GenGIList("Archaea_Bacteria")
    GenGIList("Fungi")
    GenGIList("Viruses")
